data = read.csv("../ValenbisiData/Valenbisi.csv", sep = ";")
head(data)
##         Date Hour Weekday Is_holiday Id_station Open Available Free Longuitud
## 1 01-12-2022    2     Thu      False          1 True         1   24  39.48004
## 2 01-12-2022    2     Thu      False          2 True         1   14  39.47989
## 3 01-12-2022    2     Thu      False          3 True         5   15  39.47684
## 4 01-12-2022    2     Thu      False          4 True         6   18  39.47675
## 5 01-12-2022    2     Thu      False          5 True        14    1  39.47690
## 6 01-12-2022    2     Thu      False          6 True         9    5  39.47280
##      Latitud
## 1 -0.3829293
## 2 -0.3797484
## 3 -0.3802884
## 4 -0.3753424
## 5 -0.3711404
## 6 -0.3840834
data$Date = as.Date(data$Date, format = "%d-%m-%Y")
data$Hour = as.factor(data$Hour)
data$Is_holiday = as.logical(data$Is_holiday)
data$Id_station = as.factor(data$Id_station)
data$Open = as.logical(data$Open)
head(data)
##         Date Hour Weekday Is_holiday Id_station Open Available Free Longuitud
## 1 2022-12-01    2     Thu      FALSE          1 TRUE         1   24  39.48004
## 2 2022-12-01    2     Thu      FALSE          2 TRUE         1   14  39.47989
## 3 2022-12-01    2     Thu      FALSE          3 TRUE         5   15  39.47684
## 4 2022-12-01    2     Thu      FALSE          4 TRUE         6   18  39.47675
## 5 2022-12-01    2     Thu      FALSE          5 TRUE        14    1  39.47690
## 6 2022-12-01    2     Thu      FALSE          6 TRUE         9    5  39.47280
##      Latitud
## 1 -0.3829293
## 2 -0.3797484
## 3 -0.3802884
## 4 -0.3753424
## 5 -0.3711404
## 6 -0.3840834
summary(data)
##       Date                 Hour           Weekday          Is_holiday     
##  Min.   :2022-12-01   6      : 112332   Length:2662109     Mode :logical  
##  1st Qu.:2022-12-27   3      : 112145   Class :character   FALSE:2190149  
##  Median :2023-01-21   7      : 112056   Mode  :character   TRUE :471960   
##  Mean   :2023-01-21   1      : 111504                                     
##  3rd Qu.:2023-02-16   2      : 111504                                     
##  Max.   :2023-03-13   10     : 111504                                     
##                       (Other):1991064                                     
##    Id_station         Open           Available           Free      
##  3      :   9646   Mode :logical   Min.   : 0.000   Min.   : 0.00  
##  4      :   9646   FALSE:9645      1st Qu.: 2.000   1st Qu.: 6.00  
##  11     :   9646   TRUE :2652464   Median : 7.000   Median :12.00  
##  12     :   9646                   Mean   : 7.779   Mean   :11.68  
##  15     :   9646                   3rd Qu.:12.000   3rd Qu.:16.00  
##  17     :   9646                   Max.   :40.000   Max.   :40.00  
##  (Other):2604233                                                   
##    Longuitud        Latitud       
##  Min.   :39.44   Min.   :-0.4263  
##  1st Qu.:39.46   1st Qu.:-0.3897  
##  Median :39.47   Median :-0.3733  
##  Mean   :39.47   Mean   :-0.3712  
##  3rd Qu.:39.48   3rd Qu.:-0.3541  
##  Max.   :39.50   Max.   :-0.3234  
## 
data_date = data %>%
  group_by(Date) %>%
  summarise(AvailableMean = mean(Available),
            AvailableSum = sum(Available),
            FreeMean = mean(Free),
            FreeSum = sum(Free))

plot_ly(data_date) %>%
  add_lines(x = ~Date, y = ~AvailableMean, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Date, y = ~FreeMean, color = I("orange"), name = "Free") %>%
  layout(title = "Media por días")
plot_ly(data_date) %>%
  add_lines(x = ~Date, y = ~AvailableSum, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Date, y = ~FreeSum, color = I("orange"), name = "Free") %>%
  layout(title = "Suma por días")
data_hour = data %>%
  group_by(Hour) %>%
  summarise(AvailableMean = mean(Available),
            AvailableSum = sum(Available),
            FreeMean = mean(Free),
            FreeSum = sum(Free))
plot_ly(data_hour) %>%
  add_lines(x = ~Hour, y = ~AvailableMean, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Hour, y = ~FreeMean, color = I("orange"), name = "Free") %>%
  layout(title = "Media por horas")
plot_ly(data_hour) %>%
  add_lines(x = ~Hour, y = ~AvailableSum, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Hour, y = ~FreeSum, color = I("orange"), name = "Free") %>%
  layout(title = "Suma por horas")
data_day = data %>%
  group_by(Weekday) %>%
  summarise(AvailableMean = mean(Available),
            AvailableSum = sum(Available),
            FreeMean = mean(Free),
            FreeSum = sum(Free))
data_day$Weekday <- factor(data_day$Weekday,
                           levels=c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"),
                           ordered = TRUE)
data_day <- data_day[order(data_day$Weekday),]

plot_ly(data_day) %>%
  add_lines(x = ~Weekday, y = ~AvailableMean, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Weekday, y = ~FreeMean, color = I("orange"), name = "Free") %>%
  layout(title = "Media por día de la semana")
plot_ly(data_day) %>%
  add_lines(x = ~Weekday, y = ~AvailableSum, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Weekday, y = ~FreeSum, color = I("orange"), name = "Free") %>%
  layout(title = "Suma por día de la semana")
data_holiday = data %>%
  group_by(Is_holiday) %>%
  summarise(AvailableMean = mean(Available),
            AvailableSum = sum(Available),
            FreeMean = mean(Free),
            FreeSum = sum(Free))

plot_ly(data_holiday) %>%
  add_bars(x = ~Is_holiday, y = ~AvailableMean, color = I("blue"), name = "Available") %>%
  add_bars(x = ~Is_holiday, y = ~FreeMean, color = I("orange"), name = "Free") %>%
  layout(title = "Media por día festivo")
plot_ly(data_holiday) %>%
  add_bars(x = ~Is_holiday, y = ~AvailableSum, color = I("blue"), name = "Available") %>%
  add_bars(x = ~Is_holiday, y = ~FreeSum, color = I("orange"), name = "Free") %>%
  layout(title = "Suma por día festivo")
data_station = data %>%
  group_by(Id_station) %>%
  summarise(AvailableMean = mean(Available),
            AvailableSum = sum(Available),
            FreeMean = mean(Free),
            FreeSum = sum(Free)) %>%
  arrange(Id_station)
plot_ly(data_station) %>%
  add_lines(x = ~Id_station, y = ~AvailableMean, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Id_station, y = ~FreeMean, color = I("orange"), name = "Free") %>%
  layout(title = "Media por horas")
plot_ly(data_station) %>%
  add_lines(x = ~Id_station, y = ~AvailableSum, color = I("blue"), name = "Available") %>%
  add_lines(x = ~Id_station, y = ~FreeSum, color = I("orange"), name = "Free") %>%
  layout(title = "Suma por horas")